library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(broom)
gapminderContains subset of the data on five year intervals from 1952 to 2007.
library(gapminder)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
Australia was already had one of the top life expectancies in the 1950s.
oz <- gapminder %>% filter(country == "Australia")
oz
## # A tibble: 12 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Australia Oceania 1952 69.1 8691212 10040.
## 2 Australia Oceania 1957 70.3 9712569 10950.
## 3 Australia Oceania 1962 70.9 10794968 12217.
## 4 Australia Oceania 1967 71.1 11872264 14526.
## 5 Australia Oceania 1972 71.9 13177000 16789.
## 6 Australia Oceania 1977 73.5 14074100 18334.
## 7 Australia Oceania 1982 74.7 15184200 19477.
## 8 Australia Oceania 1987 76.3 16257249 21889.
## 9 Australia Oceania 1992 77.6 17481977 23425.
## 10 Australia Oceania 1997 78.8 18565243 26998.
## 11 Australia Oceania 2002 80.4 19546792 30688.
## 12 Australia Oceania 2007 81.2 20434176 34435.
ggplot(data = oz,
aes(x = year,
y = lifeExp)) +
geom_line()
lifeExp ~ yearoz_lm <- lm(lifeExp ~ year, data = oz)
oz_lm
##
## Call:
## lm(formula = lifeExp ~ year, data = oz)
##
## Coefficients:
## (Intercept) year
## -376.1163 0.2277
summary(oz_lm)
##
## Call:
## lm(formula = lifeExp ~ year, data = oz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0250 -0.5201 0.1162 0.3781 0.7909
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -376.11630 20.54716 -18.30 5.09e-09 ***
## year 0.22772 0.01038 21.94 8.67e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6206 on 10 degrees of freedom
## Multiple R-squared: 0.9796, Adjusted R-squared: 0.9776
## F-statistic: 481.3 on 1 and 10 DF, p-value: 8.667e-10
tidy(oz_lm)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -376. 20.5 -18.3 5.09e- 9
## 2 year 0.228 0.0104 21.9 8.67e-10
\[\widehat{lifeExp} = -376.1163 + 0.2277~year\]
Let us treat 1950 is the first year, so for model fitting we are going to shift year to begin in 1950, makes interpretability easier.
gap <- gapminder %>% mutate(year1950 = year - 1950)
oz <- gap %>% filter(country == "Australia")
oz_lm <- lm(lifeExp ~ year1950, data = oz)
oz_lm
##
## Call:
## lm(formula = lifeExp ~ year1950, data = oz)
##
## Coefficients:
## (Intercept) year1950
## 67.9451 0.2277
tidy(oz_lm)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 67.9 0.355 192. 3.70e-19
## 2 year1950 0.228 0.0104 21.9 8.67e-10
\[\widehat{lifeExp} = 67.9 + 0.2277~year1950 \] ### Q8. Extract residuals and fitted values using augment()
oz_aug <- augment(oz_lm, oz)
oz_aug
## # A tibble: 12 x 13
## country continent year lifeExp pop gdpPercap year1950 .fitted .resid
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Australia Oceania 1952 69.1 8691212 10040. 2 68.4 0.719
## 2 Australia Oceania 1957 70.3 9712569 10950. 7 69.5 0.791
## 3 Australia Oceania 1962 70.9 10794968 12217. 12 70.7 0.252
## 4 Australia Oceania 1967 71.1 11872264 14526. 17 71.8 -0.716
## 5 Australia Oceania 1972 71.9 13177000 16789. 22 73.0 -1.02
## 6 Australia Oceania 1977 73.5 14074100 18334. 27 74.1 -0.604
## 7 Australia Oceania 1982 74.7 15184200 19477. 32 75.2 -0.492
## 8 Australia Oceania 1987 76.3 16257249 21889. 37 76.4 -0.0508
## 9 Australia Oceania 1992 77.6 17481977 23425. 42 77.5 0.0505
## 10 Australia Oceania 1997 78.8 18565243 26998. 47 78.6 0.182
## 11 Australia Oceania 2002 80.4 19546792 30688. 52 79.8 0.583
## 12 Australia Oceania 2007 81.2 20434176 34435. 57 80.9 0.310
## # … with 4 more variables: .hat <dbl>, .sigma <dbl>, .cooksd <dbl>,
## # .std.resid <dbl>
ggplot(data = oz_aug,
aes(x = year,
y = .fitted)) +
geom_line(colour = "blue") +
geom_point(aes(x = year,
y = lifeExp))
ggplot(oz_aug,
aes(x = .fitted, y = .resid)) +
ylim(c(-5,5)) +
geom_point()
# Another way to look at Residuals: .resid with year:
ggplot(data = oz_aug,
aes(x = year,
y = .std.resid)) +
ylim(c(-5,5)) +
geom_hline(yintercept = 0,
colour = "white",
size = 2) +
geom_line()
Life expectancy has increased 2.3 years every decade, on average.
There was a slow period from 1960 through to 1972, probably related to mortality during the Vietnam war.
nz <- gap %>% filter(country == "New Zealand")
nz_lm <- lm(lifeExp ~ year1950, data = nz)
nz_lm
##
## Call:
## lm(formula = lifeExp ~ year1950, data = nz)
##
## Coefficients:
## (Intercept) year1950
## 68.3013 0.1928
japan <- gap %>% filter(country == "Japan")
japan_lm <- lm(lifeExp ~ year1950, data = japan)
japan_lm
##
## Call:
## lm(formula = lifeExp ~ year1950, data = japan)
##
## Coefficients:
## (Intercept) year1950
## 64.4162 0.3529
italy <- gap %>% filter(country == "Italy")
italy_lm <- lm(lifeExp ~ year1950, data = italy)
italy_lm
##
## Call:
## lm(formula = lifeExp ~ year1950, data = italy)
##
## Coefficients:
## (Intercept) year1950
## 66.0574 0.2697
by_country <- gap %>%
select(country, year1950, lifeExp, continent) %>%
group_by(country, continent) %>%
nest()
by_country
## # A tibble: 142 x 3
## # Groups: country, continent [142]
## country continent data
## <fct> <fct> <list>
## 1 Afghanistan Asia <tibble[,2] [12 × 2]>
## 2 Albania Europe <tibble[,2] [12 × 2]>
## 3 Algeria Africa <tibble[,2] [12 × 2]>
## 4 Angola Africa <tibble[,2] [12 × 2]>
## 5 Argentina Americas <tibble[,2] [12 × 2]>
## 6 Australia Oceania <tibble[,2] [12 × 2]>
## 7 Austria Europe <tibble[,2] [12 × 2]>
## 8 Bahrain Asia <tibble[,2] [12 × 2]>
## 9 Bangladesh Asia <tibble[,2] [12 × 2]>
## 10 Belgium Europe <tibble[,2] [12 × 2]>
## # … with 132 more rows
data?by_country$data[[1]]
## # A tibble: 12 x 2
## year1950 lifeExp
## <dbl> <dbl>
## 1 2 28.8
## 2 7 30.3
## 3 12 32.0
## 4 17 34.0
## 5 22 36.1
## 6 27 38.4
## 7 32 39.9
## 8 37 40.8
## 9 42 41.7
## 10 47 41.8
## 11 52 42.1
## 12 57 43.8
lm_afganistan <- lm(lifeExp ~ year1950, data = by_country$data[[1]])
lm_albania <- lm(lifeExp ~ year1950, data = by_country$data[[2]])
lm_algeria <- lm(lifeExp ~ year1950, data = by_country$data[[3]])
map(<data object>, <function>)
by_countryfit_lm <- function(x){
lm(lifeExp ~ year1950, data = x)
}
mapped_lm <- map(by_country$data, fit_lm)
mapped_lm
## [[1]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 29.3566 0.2753
##
##
## [[2]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 58.5598 0.3347
##
##
## [[3]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.2364 0.5693
##
##
## [[4]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 31.7080 0.2093
##
##
## [[5]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 62.2250 0.2317
##
##
## [[6]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.9451 0.2277
##
##
## [[7]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.964 0.242
##
##
## [[8]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 51.8142 0.4675
##
##
## [[9]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.1392 0.4981
##
##
## [[10]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.4738 0.2091
##
##
## [[11]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 38.9200 0.3342
##
##
## [[12]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 37.7566 0.4999
##
##
## [[13]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 57.3901 0.3498
##
##
## [[14]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 52.80778 0.06067
##
##
## [[15]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 50.7319 0.3901
##
##
## [[16]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.4459 0.1457
##
##
## [[17]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 33.957 0.364
##
##
## [[18]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 40.2704 0.1541
##
##
## [[19]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 36.2236 0.3959
##
##
## [[20]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 40.7492 0.2501
##
##
## [[21]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 68.4461 0.2189
##
##
## [[22]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 38.4417 0.1839
##
##
## [[23]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.3029 0.2532
##
##
## [[24]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 53.3640 0.4768
##
##
## [[25]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 46.1291 0.5307
##
##
## [[26]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 52.6656 0.3808
##
##
## [[27]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.0952 0.4504
##
##
## [[28]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 41.77325 0.09392
##
##
## [[29]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 46.7466 0.1951
##
##
## [[30]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 58.2991 0.4028
##
##
## [[31]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 44.5847 0.1306
##
##
## [[32]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 63.4049 0.2255
##
##
## [[33]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 61.5711 0.3212
##
##
## [[34]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.2384 0.1448
##
##
## [[35]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 70.7909 0.1213
##
##
## [[36]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.5423 0.3674
##
##
## [[37]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 47.6555 0.4712
##
##
## [[38]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 48.0653 0.5001
##
##
## [[39]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.8571 0.5555
##
##
## [[40]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 45.5577 0.4771
##
##
## [[41]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 33.8100 0.3102
##
##
## [[42]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 34.9459 0.3747
##
##
## [[43]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.4138 0.3072
##
##
## [[44]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.9731 0.2379
##
##
## [[45]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.3131 0.2385
##
##
## [[46]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 38.0419 0.4467
##
##
## [[47]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 27.2367 0.5818
##
##
## [[48]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.1408 0.2137
##
##
## [[49]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.8493 0.3217
##
##
## [[50]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 66.5824 0.2424
##
##
## [[51]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 41.0569 0.5313
##
##
## [[52]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 30.7073 0.4248
##
##
## [[53]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 31.1935 0.2718
##
##
## [[54]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 38.4520 0.3971
##
##
## [[55]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 41.9067 0.5429
##
##
## [[56]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 62.697 0.366
##
##
## [[57]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.7455 0.1236
##
##
## [[58]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 71.6328 0.1654
##
##
## [[59]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 38.2591 0.5053
##
##
## [[60]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.6138 0.6346
##
##
## [[61]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 43.9857 0.4966
##
##
## [[62]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 49.6430 0.2352
##
##
## [[63]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 67.1432 0.1991
##
##
## [[64]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.7662 0.2671
##
##
## [[65]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 66.0574 0.2697
##
##
## [[66]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 62.2182 0.2214
##
##
## [[67]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 64.4162 0.3529
##
##
## [[68]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.9204 0.5717
##
##
## [[69]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 46.5890 0.2065
##
##
## [[70]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 54.2727 0.3164
##
##
## [[71]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 48.6167 0.5554
##
##
## [[72]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 56.6257 0.4168
##
##
## [[73]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 58.165 0.261
##
##
## [[74]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 47.18789 0.09557
##
##
## [[75]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.64444 0.09599
##
##
## [[76]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 40.8509 0.6255
##
##
## [[77]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.8606 0.4037
##
##
## [[78]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 36.4419 0.2342
##
##
## [[79]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 50.5762 0.4645
##
##
## [[80]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 32.2976 0.3768
##
##
## [[81]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.1328 0.4464
##
##
## [[82]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 54.6739 0.3485
##
##
## [[83]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 52.103 0.451
##
##
## [[84]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.9490 0.4387
##
##
## [[85]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 61.656 0.293
##
##
## [[86]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 41.6059 0.5425
##
##
## [[87]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 33.7572 0.2245
##
##
## [[88]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 40.5454 0.4331
##
##
## [[89]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 46.6720 0.2312
##
##
## [[90]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 33.3731 0.5293
##
##
## [[91]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 71.6162 0.1367
##
##
## [[92]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 68.3013 0.1928
##
##
## [[93]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 41.9321 0.5565
##
##
## [[94]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 34.4664 0.3421
##
##
## [[95]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 37.4434 0.2081
##
##
## [[96]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 71.9507 0.1319
##
##
## [[97]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.6634 0.7722
##
##
## [[98]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.9114 0.4058
##
##
## [[99]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 57.3526 0.3542
##
##
## [[100]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 62.1671 0.1574
##
##
## [[101]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 43.2922 0.5277
##
##
## [[102]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 48.5634 0.4205
##
##
## [[103]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 64.3885 0.1962
##
##
## [[104]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 60.4724 0.3372
##
##
## [[105]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 66.5274 0.2106
##
##
## [[106]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 53.0778 0.4599
##
##
## [[107]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 63.6473 0.1574
##
##
## [[108]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.83361 -0.04583
##
##
## [[109]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 47.8462 0.3407
##
##
## [[110]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 39.5149 0.6496
##
##
## [[111]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 35.7373 0.5047
##
##
## [[112]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 61.0240 0.2552
##
##
## [[113]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 30.455 0.214
##
##
## [[114]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 61.1641 0.3409
##
##
## [[115]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 66.742 0.134
##
##
## [[116]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.6853 0.2005
##
##
## [[117]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 34.2163 0.2296
##
##
## [[118]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 49.0030 0.1692
##
##
## [[119]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.9160 0.2809
##
##
## [[120]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 59.3017 0.2449
##
##
## [[121]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 37.1086 0.3828
##
##
## [[122]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 46.19771 0.09507
##
##
## [[123]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 71.2725 0.1663
##
##
## [[124]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 69.0093 0.2222
##
##
## [[125]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 44.9926 0.5544
##
##
## [[126]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 60.6829 0.3272
##
##
## [[127]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.7590 0.1747
##
##
## [[128]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 51.962 0.347
##
##
## [[129]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 40.2123 0.3826
##
##
## [[130]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 61.7050 0.1737
##
##
## [[131]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 43.3796 0.5878
##
##
## [[132]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 45.0278 0.4972
##
##
## [[133]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 44.0320 0.1216
##
##
## [[134]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 68.437 0.186
##
##
## [[135]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 68.0455 0.1842
##
##
## [[136]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 65.3751 0.1833
##
##
## [[137]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 56.8539 0.3297
##
##
## [[138]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 37.6668 0.6716
##
##
## [[139]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 42.5962 0.6011
##
##
## [[140]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 28.9194 0.6055
##
##
## [[141]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 47.77888 -0.06043
##
##
## [[142]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = x)
##
## Coefficients:
## (Intercept) year1950
## 55.40729 -0.09302
country_model <- by_country %>%
mutate(model = map(data, function(x){
lm(lifeExp ~ year1950, data = x)
})
)
country_model
## # A tibble: 142 x 4
## # Groups: country, continent [142]
## country continent data model
## <fct> <fct> <list> <list>
## 1 Afghanistan Asia <tibble[,2] [12 × 2]> <lm>
## 2 Albania Europe <tibble[,2] [12 × 2]> <lm>
## 3 Algeria Africa <tibble[,2] [12 × 2]> <lm>
## 4 Angola Africa <tibble[,2] [12 × 2]> <lm>
## 5 Argentina Americas <tibble[,2] [12 × 2]> <lm>
## 6 Australia Oceania <tibble[,2] [12 × 2]> <lm>
## 7 Austria Europe <tibble[,2] [12 × 2]> <lm>
## 8 Bahrain Asia <tibble[,2] [12 × 2]> <lm>
## 9 Bangladesh Asia <tibble[,2] [12 × 2]> <lm>
## 10 Belgium Europe <tibble[,2] [12 × 2]> <lm>
## # … with 132 more rows
country_model <- by_country %>%
mutate(model = map(data, ~lm(lifeExp ~ year1950, data = .)))
country_model
## # A tibble: 142 x 4
## # Groups: country, continent [142]
## country continent data model
## <fct> <fct> <list> <list>
## 1 Afghanistan Asia <tibble[,2] [12 × 2]> <lm>
## 2 Albania Europe <tibble[,2] [12 × 2]> <lm>
## 3 Algeria Africa <tibble[,2] [12 × 2]> <lm>
## 4 Angola Africa <tibble[,2] [12 × 2]> <lm>
## 5 Argentina Americas <tibble[,2] [12 × 2]> <lm>
## 6 Australia Oceania <tibble[,2] [12 × 2]> <lm>
## 7 Austria Europe <tibble[,2] [12 × 2]> <lm>
## 8 Bahrain Asia <tibble[,2] [12 × 2]> <lm>
## 9 Bangladesh Asia <tibble[,2] [12 × 2]> <lm>
## 10 Belgium Europe <tibble[,2] [12 × 2]> <lm>
## # … with 132 more rows
country_model$model[[1]]
##
## Call:
## lm(formula = lifeExp ~ year1950, data = .)
##
## Coefficients:
## (Intercept) year1950
## 29.3566 0.2753
tidy(country_model$model[[1]])
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 29.4 0.699 42.0 1.40e-12
## 2 year1950 0.275 0.0205 13.5 9.84e- 8
tidy(country_model$model[[1]])
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 29.4 0.699 42.0 1.40e-12
## 2 year1950 0.275 0.0205 13.5 9.84e- 8
tidy(country_model$model[[2]])
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 58.6 1.13 51.7 1.79e-13
## 2 year1950 0.335 0.0332 10.1 1.46e- 6
tidy(country_model$model[[3]])
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 42.2 0.756 55.8 8.22e-14
## 2 year1950 0.569 0.0221 25.7 1.81e-10
country_model %>%
mutate(tidy = map(model, tidy))
## # A tibble: 142 x 5
## # Groups: country, continent [142]
## country continent data model tidy
## <fct> <fct> <list> <list> <list>
## 1 Afghanistan Asia <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 2 Albania Europe <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 3 Algeria Africa <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 4 Angola Africa <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 5 Argentina Americas <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 6 Australia Oceania <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 7 Austria Europe <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 8 Bahrain Asia <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 9 Bangladesh Asia <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## 10 Belgium Europe <tibble[,2] [12 × 2]> <lm> <tibble[,5] [2 × 5]>
## # … with 132 more rows
country_coefs <- country_model %>%
mutate(tidy = map(model, tidy)) %>%
unnest(tidy) %>%
select(country, continent, term, estimate)
country_coefs
## # A tibble: 284 x 4
## # Groups: country, continent [142]
## country continent term estimate
## <fct> <fct> <chr> <dbl>
## 1 Afghanistan Asia (Intercept) 29.4
## 2 Afghanistan Asia year1950 0.275
## 3 Albania Europe (Intercept) 58.6
## 4 Albania Europe year1950 0.335
## 5 Algeria Africa (Intercept) 42.2
## 6 Algeria Africa year1950 0.569
## 7 Angola Africa (Intercept) 31.7
## 8 Angola Africa year1950 0.209
## 9 Argentina Americas (Intercept) 62.2
## 10 Argentina Americas year1950 0.232
## # … with 274 more rows
tidy_country_coefs <- country_coefs %>%
pivot_wider(id_cols = c(term, country, continent),
names_from = term,
values_from = estimate) %>%
rename(intercept = `(Intercept)`)
tidy_country_coefs
## # A tibble: 142 x 4
## # Groups: country, continent [142]
## country continent intercept year1950
## <fct> <fct> <dbl> <dbl>
## 1 Afghanistan Asia 29.4 0.275
## 2 Albania Europe 58.6 0.335
## 3 Algeria Africa 42.2 0.569
## 4 Angola Africa 31.7 0.209
## 5 Argentina Americas 62.2 0.232
## 6 Australia Oceania 67.9 0.228
## 7 Austria Europe 66.0 0.242
## 8 Bahrain Asia 51.8 0.468
## 9 Bangladesh Asia 35.1 0.498
## 10 Belgium Europe 67.5 0.209
## # … with 132 more rows
tidy_country_coefs %>%
filter(country == "Australia")
## # A tibble: 1 x 4
## # Groups: country, continent [1]
## country continent intercept year1950
## <fct> <fct> <dbl> <dbl>
## 1 Australia Oceania 67.9 0.228
country_aug <- country_model %>%
mutate(augmented = map(model, augment)) %>%
unnest(augmented)
country_aug
## # A tibble: 1,704 x 12
## # Groups: country, continent [142]
## country continent data model lifeExp year1950 .fitted .resid .hat .sigma
## <fct> <fct> <lis> <lis> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghani… Asia <tib… <lm> 28.8 2 29.9 -1.11 0.295 1.21
## 2 Afghani… Asia <tib… <lm> 30.3 7 31.3 -0.952 0.225 1.24
## 3 Afghani… Asia <tib… <lm> 32.0 12 32.7 -0.664 0.169 1.27
## 4 Afghani… Asia <tib… <lm> 34.0 17 34.0 -0.0172 0.127 1.29
## 5 Afghani… Asia <tib… <lm> 36.1 22 35.4 0.674 0.0991 1.27
## 6 Afghani… Asia <tib… <lm> 38.4 27 36.8 1.65 0.0851 1.15
## 7 Afghani… Asia <tib… <lm> 39.9 32 38.2 1.69 0.0851 1.15
## 8 Afghani… Asia <tib… <lm> 40.8 37 39.5 1.28 0.0991 1.21
## 9 Afghani… Asia <tib… <lm> 41.7 42 40.9 0.754 0.127 1.26
## 10 Afghani… Asia <tib… <lm> 41.8 47 42.3 -0.534 0.169 1.27
## # … with 1,694 more rows, and 2 more variables: .cooksd <dbl>, .std.resid <dbl>
p1 <- gapminder %>%
ggplot(aes(year, lifeExp, group = country)) +
geom_line(alpha = 1/3) + ggtitle("Data")
p2 <- ggplot(country_aug) +
geom_line(aes(x = year1950 + 1950,
y = .fitted,
group = country),
alpha = 1/3) +
xlab("year") +
ggtitle("Fitted models")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(p1, p2, ncol=2)
p <- ggplot(tidy_country_coefs,
aes(x = intercept,
y = year1950,
colour = continent,
label = country)) +
geom_point(alpha = 0.5,
size = 2) +
scale_color_brewer(palette = "Dark2")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ggplotly(p)
country_glance <- country_model %>%
mutate(glance = map(model, glance)) %>%
unnest(glance)
country_glance
## # A tibble: 142 x 16
## # Groups: country, continent [142]
## country continent data model r.squared adj.r.squared sigma statistic
## <fct> <fct> <list> <lis> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanis… Asia <tibble[,2… <lm> 0.948 0.942 1.22 181.
## 2 Albania Europe <tibble[,2… <lm> 0.911 0.902 1.98 102.
## 3 Algeria Africa <tibble[,2… <lm> 0.985 0.984 1.32 662.
## 4 Angola Africa <tibble[,2… <lm> 0.888 0.877 1.41 79.1
## 5 Argentina Americas <tibble[,2… <lm> 0.996 0.995 0.292 2246.
## 6 Australia Oceania <tibble[,2… <lm> 0.980 0.978 0.621 481.
## 7 Austria Europe <tibble[,2… <lm> 0.992 0.991 0.407 1261.
## 8 Bahrain Asia <tibble[,2… <lm> 0.967 0.963 1.64 291.
## 9 Banglade… Asia <tibble[,2… <lm> 0.989 0.988 0.977 930.
## 10 Belgium Europe <tibble[,2… <lm> 0.995 0.994 0.293 1822.
## # … with 132 more rows, and 8 more variables: p.value <dbl>, df <dbl>,
## # logLik <dbl>, AIC <dbl>, BIC <dbl>, deviance <dbl>, df.residual <int>,
## # nobs <int>
ggplot(country_glance,
aes(x = r.squared)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Examine the countries with the worst fit, countries with \(R^2<0.45\), by making scatterplots of the data, with the linear model overlaid.
badfit <- country_glance %>% filter(r.squared <= 0.45)
gap_bad <- gap %>% filter(country %in% badfit$country)
gg_bad_fit <-
ggplot(data = gap_bad,
aes(x = year,
y = lifeExp)) +
geom_point() +
facet_wrap(~country) +
scale_x_continuous(breaks = seq(1950,2000,10),
labels = c("1950", "60","70", "80","90","2000")) +
geom_smooth(method = "lm",
se = FALSE)
gg_bad_fit
## `geom_smooth()` using formula 'y ~ x'
Each of these countries had been moving on a nice trajectory of increasing life expectancy, and then suffered a big dip during the time period.